import scrapy
from lxml import etree
from 爬虫.eastmoney.eastmoney.items import news_daily, yearly_financial_report


class yearly_financial_report_spider(scrapy.Spider):
    name = 'yearly_financial_report'
    allowed_domains = ['data.eastmoney.com']
    start_urls = ['https://data.eastmoney.com/bbsj/202312/yjbb.html']
    custom_settings = {
        'ITEM_PIPELINES': {'爬虫.eastmoney.eastmoney.pipelines.ConsolePipeline': 300},
        'ITEM_PIPELINES': {'爬虫.eastmoney.eastmoney.pipelines.MysqlPipeline': 200},
    }

    def parse(self, response, **kwargs):
        item = yearly_financial_report()
        tree = etree.HTML(response.text)
        tr_list = tree.xpath("//div[@class='dataview-center']//div[@class='dataview-body']//tbody//tr")


        for tr in tr_list:
            item['earnings_per_share'] = tr.xpath('./td[5]/text()')[0] if tr.xpath(
                './td[5]/text()') else None  # 每股收益(元)
            item['total_revenue'] = tr.xpath('./td[6]/text()')[0] if tr.xpath('./td[6]/text()') else None  # 营业总收入
            item['last_year'] = tr.xpath('./td[7]/text()')[0] if tr.xpath('./td[7]/text()') else None  # 去年同期
            item['growth_rate'] = tr.xpath('./td[7]/span/text()')[0] if tr.xpath(
                './td[7]/span/text()') else None  # 同比增长
            item['quarterly_growth_rate'] = tr.xpath('./td[8]/span/text()')[0] if tr.xpath(
                './td[8]/span/text()') else None  # 季度环比增长
            item['net_profit'] = tr.xpath('./td[9]/text()')[0] if tr.xpath('./td[9]/text()') else None  # 净利润
            item['last_year_net_profit'] = tr.xpath('./td[11]/text()')[0] if tr.xpath(
                './td[11]/text()') else None  # 去年同期
            item['profit_growth_rate'] = tr.xpath('./td[10]/span/text()')[0] if tr.xpath(
                './td[10]/span/text()') else None  # 同比增长
            item['quarterly_profit_growth_rate'] = tr.xpath('./td[11]/span/text()')[0] if tr.xpath(
                './td[11]/span/text()') else None  # 季度环比增长
            item['net_assets_per_share'] = tr.xpath('./td[12]/text()')[0] if tr.xpath(
                './td[12]/text()') else None  # 每股净资产
            item['net_assets_return_rate'] = tr.xpath('./td/a/span/text()')[0] if tr.xpath(
                './td/a/span/text()') else None  # 净资产收益率
            item['announcement_date'] = tr.xpath('./td[18]/span/text()')[0] if tr.xpath(
                './td[18]/span/text()') else None  # 公告日期
            yield item












